#!/usr/bin/env python
# coding: utf-8

# In[19]:


import pandas as pd
abstract_tagger = pd.read_csv("./output/abstract_tagger.csv")
article_list = abstract_tagger['Article'].unique()
Len = len(article_list)

for id,article in enumerate(article_list):
    print(id,article)
    print("Remain: ",Len-id)
    same_article_columns = abstract_tagger[abstract_tagger["Article"] == article]
    filt_columns = same_article_columns[same_article_columns['tag'] != 'dbpedia']
    wiki_index_list = []
    sentence_index_list = []
    for i,dp in filt_columns.iterrows():
        if dp['tag'] == 'wiki':
            wiki_index_list.append(i)
        elif dp['tag'] == 'sentence':
            sentence_index_list.append(i)
        else:
            continue

    for sen_i in sentence_index_list:
        for wiki_i in wiki_index_list:
            if abstract_tagger.loc[sen_i, 'TEXT'] == abstract_tagger.loc[wiki_i,'TEXT']:
                abstract_tagger.loc[sen_i, 'abstract':'wiki_abstract_key'] = abstract_tagger.loc[wiki_i, 'abstract':'wiki_abstract_key']
                break

abstract_tagger.to_excel("./output/sentence_to_wiki.xlsx",encoding='utf-8')
print("done")

